
********************************************************************************
* Create a dataset with basic individual characteristics for the full Swedish 
* population 2017 
********************************************************************************

* Retrieve table "fodelseland" from SQL database
odbc load, exec("select * from fodelseland")dsn("P0846") clear
duplicates drop P0846_LopNr_PersonNr, force
rename P0846_LopNr_PersonNr PersonId
gen Foreign = 1
replace Foreign = 0 if Varldsdelnamn == "Sverige"
tab Foreign 

save Data/data_country_of_origin, replace

* Retrieve table "individ_2017" from SQL database
odbc load, exec("select * from individ_2017")dsn("P0846") clear

* Save raw data so I do not need to use odbc each time (which is slow)
save Data/ODBC_individ_2017, replace
use Data/ODBC_individ_2017, clear

keep P0846_LopNr_personnr P0846_lopnr_PeOrgNr P0846_lopnr_CfarNr Civil FodelseAr Kon LoneInk Sun*niva AstKommun Barn* Ssyk4_2012_J16

* There are a few duplicates with equal values of all variables.
* Keep the first occurrence in these cases
duplicates drop

* Use better variable names
rename P0846_LopNr_personnr PersonId
rename P0846_lopnr_CfarNr WorkplaceId

* Not a valid WorkplaceId 
replace WorkplaceId = . if WorkplaceId ==  80491
egen num_of_workers = count(PersonId) if WorkplaceId != ., by(WorkplaceId)

* Add i variable measuring whether foreign born
merge 1:1 PersonId using Data/data_country_of_origin
keep if _merge == 3
drop _merge

* Add birth year, month and day
merge 1:1 PersonId using Data/data_basic_X
keep if _merge == 3
drop _merge

* A measure of being employed based on income (income is measured in 100s of SEK)
rename LoneInk Income
destring Income, replace
hist Income if Income > 1000 & Income < 10000
gen LogIncome = log(Income)
gen Employed = Income > 1000 & Income != .
tab Employed, miss

* Gender
gen Female = Kon == "2"
tab Female, miss
drop Kon

* Age
destring FodelseAr, replace
gen Age2017 = 2017 - FodelseAr
drop FodelseAr

* Education based on Sun2000niva (see LISA documentation)
tab Sun*niva, miss
destring Sun*niva, replace
rename Sun*niva sunnivå
gen edu = trunc(sunnivå/100)
tab edu, miss
gen EduLessHighSchool = edu <= 2 
gen EduHighSchool = edu >= 3 & edu != 9
gen EduCollege = edu >= 4 & edu != 9
gen EduMiss = edu == 9
drop edu
drop sunnivå

* Married 
tab Civil, miss 
replace Civil = "W" if Civil != "EP" & Civil != "G" & Civil != "OG" & Civil != "RP" & Civil != "S" & Civil != "SP" 
gen Married = (Civil == "G" | Civil == "W" | Civil == "RP" | Civil == "EP")
tab Married, miss
gen Married2 = Civil == "G"
drop Civil

* Age when outcome is measured in 2022 etc
gen Age2010 = Age2017 - 7 
gen Age2018 = Age2017 + 1
gen Age2022 = Age2018 + 4

* Municipality of workplace
destring AstKommun, replace

* Number of children
egen NumberOfChildren = rowtotal(Barn0_3 Barn11_15 Barn16_17 Barn18_19 Barn20plus Barn4_6 Barn7_10)
tab NumberOfChildren
drop Barn*

* Add DESO 2017 (neighbourhood codes)
merge 1:1 PersonId using Data/data_DESO_Bostad_2017, keepusing(deso)
drop if _merge == 2
drop _merge
replace deso = "99999999" if deso == ""

* Add SNI industry codes and other firm variables 
merge m:1 P0846_lopnr_PeOrgNr using Data/data_firm_X_2017
drop if _merge == 2
drop _merge
rename SNI_Section WP_SNI_Section
rename Org_Sni2007 WP_Org_Sni2007
rename P0846_lopnr_PeOrgNr WP_P0846_lopnr_PeOrgNr

* Occupation at the one digit level
replace Ssyk4_2012_J16 = "" if Ssyk4_2012_J16 == "****"
destring Ssyk4_2012_J16, replace
gen Occ1 = trunc(Ssyk4_2012_J16/1000)
tab Occ1, miss

compress
order PersonId deso Occ1 WP_Org_Sni2007 WP_SNI_Section WP_P0846_lopnr_PeOrgNr WorkplaceId birth_year birth_month birth_day
order PersonId-Age2017 NumberOfChildren
save Data/data_ind_X_2017, replace

* Save a data set which can be used to add on mother and father characteristics
* Mothers
use Data/data_ind_X_2017, clear
keep PersonId Age2017 Married Married2 NumberOfChildren LogIncome Income Female EduCollege

* Mothers
preserve
 keep if Female == 1
 drop Female
 rename PersonId MorId
 foreach parent_X of varlist Age2017 Married Married2 NumberOfChildren LogIncome  Income EduCollege   {
  rename `parent_X' Mother`parent_X' 
 }
 save Data/data_mothers_X_2017, replace
restore
		
* Fathers
keep if Female == 0
drop Female
rename PersonId FarId
foreach parent_X of varlist Age2017 Married Married2 NumberOfChildren LogIncome  Income EduCollege   {
 rename `parent_X' Fathers`parent_X' 
}
save Data/data_fathers_X_2017, replace


